# AMAR ET AL. - COUNTERING MISINFORMATION EARLY (2025)
## REPLICATION FILE: 21_robust_follow_up_order.R
### This script analyzes the impact of follow-up timing on guardian ITTs.
# ----
# Function ----
tab_regression_subgroup_ITT <- function(varname, covariate, design) {
  design <- design[!is.na(design$variables[[covariate]]), ]
  
  # Get unique levels of the 'endline_weeks_from_first_category' covariate
  covariate_levels <- unique(design$variables[[covariate]])
  
  # Filter out 0
  ##covariate_levels <- covariate_levels[!covariate_levels == 0]
  
  # Initialize an empty dataframe to store results
  results <- data.frame()
  
  # Loop through each level of the covariate
  for (level in covariate_levels) {
    # Filter the design for the current level
    design_sub <- design[design$variables[[covariate]] == level,]
    
    # Define the formula
    fml <- as.formula(paste(varname, "~ treatment + library_spillover_pre"))
    
    # Run the regression
    mod <- svyglm(fml, design = design_sub)
    
    # Extract the results and add relevant information
    out <- tidy(mod) %>%
      mutate(dv = varname, n = nobs(mod), covariate_level = level, .before = 1)
    
    # Append to results
    results <- bind_rows(results, out)
  }
  
  return(results)
}

# Run regressions ---
subgroup_guardian_itt_follow_up_timing <- bimli_svy_dkr.rm %>%
  # lapply to process each dependent variable
  {lapply(guardian_dvs$dv[is.na(guardian_dvs$subidx)], function(x) tab_regression_subgroup_ITT(x, "follow_time_student_first", .))} %>%
  bind_rows() %>% # Combine all results
  mutate(margin = qnorm(0.975) * std.error,
         lower = estimate - margin,
         upper = estimate + margin) %>%
  filter(term != "(Intercept)" & !grepl("library_spillover_pre", term))

subgroup_guardian_itt_follow_up_timing <- left_join(guardian_dvs, subgroup_guardian_itt_follow_up_timing, by = "dv")

# Plot ----
subgroup_guardian_itt_follow_up_timing %>%
  filter(is.na(subidx) & secondary_outcome == FALSE & mechanism == FALSE, follow == TRUE) %>%
  mutate(
    label = if_else(str_detect(label, "Index"), "Index", label),
    label = as_factor(label),
    idx = as_factor(idx),
    is.idx = if_else(label == "Index", TRUE, FALSE),
    significance = ifelse(p.value < 0.05, "YES", "NO"),
    covariate_level = recode_factor(
      covariate_level,
      `0` = "Guardian interviewed first",
      `1` = "Student interviewed first"
    )
  ) %>%
  ggplot(aes(x = estimate, y = fct_rev(label), color = covariate_level,
             group = covariate_level, linetype = significance)) +
  geom_vline(xintercept = 0, lty = 2) +
  facet_grid(rows = "idx", scales = "free_y", space = "free_y", switch = "y") + 
  geom_point(position = position_dodge(width = 0.5)) +  
  geom_errorbar(aes(xmin = lower, xmax = upper), width = 0, position = position_dodge(width = 0.5)) +
  xlab("Guardian ITTs by Follow-Up Timing") +
  scale_y_discrete(position = "right") +
  scale_linetype_manual(values = c("dashed", "solid")) +
  theme_bw() %+replace%
  theme(axis.title.y = element_blank(),
        strip.text.y.left = element_text(angle = 0),
        legend.position = "bottom") + 
  guides(linetype = "none") +  
  labs(color = "Interview Order") +
  scale_color_manual(values = rev(c("#2066a8", "#ea801c")))

ggsave("output/figures/subgroup_guardian_itt_follow_up_timing.pdf",  height = 4, width = 8)

